Python Frequently-Asked Questions v' + version print "

#!/usr/local/bin/python # A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96) # Reads a text file given on standard input or named as first argument, and # generates HTML 2.0 on standard output. Recognizes these constructions: # # HTML element pattern at the beginning of a line # # section heading ()+ # numbered list element <1-2 spaces>()+ # unnumbered list element <0-2 spaces> # preformatted section # # Heading level is determined by the number of () segments. # Blank lines force a separation of elements; if none of the above four # types is indicated, a new paragraph begins. A line beginning with many # spaces is interpreted as a continuation (instead of preformatted) after # a list element. Headings are anchored; paragraphs starting with "Q." are # emphasized, and those marked with "A." get their first sentence emphasized. # # Hyperlinks are created from references to: # URLs, explicitly marked using # other questions, of the form "question ()*" # sections, of the form "section ". import sys, string, regex, regsub, regex_syntax regex.set_syntax(regex_syntax.RE_SYNTAX_AWK) # --------------------------------------------------------- regular expressions orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +') itemprog = regex.compile(' ? ?[-*] +') headingprog = regex.compile('([1-9][0-9]*\.)+ +') prefmtprog = regex.compile(' ') blankprog = regex.compile('^[ \t\r\n]$') questionprog = regex.compile(' *Q\. +') answerprog = regex.compile(' *A\. +') sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)') mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To' '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold) urlprog = regex.compile('<URL:([^&]+)>') addrprog = regex.compile('<([^>@:]+@[^&@:]+)>') qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)') srefprog = regex.compile('section +([1-9][0-9]*)') entityprog = regex.compile('[&<>]') # ------------------------------------------------------------ global variables body = [] ollev = ullev = 0 element = content = secnum = version = '' # ----------------------------------------------------- for making nested lists def dnol(): global body, ollev ollev = ollev + 1 if body[-1] == '': del body[-1] body.append('

') def upol(): global body, ollev ollev = ollev - 1 body.append(ollev and '' or '') # --------------------------------- output one element and convert its contents def spew(clearol=0, clearul=0): global content, body, ollev, ullev if content: if entityprog.search(content) > -1: content = regsub.gsub('&', '&', content) content = regsub.gsub('<', '<', content) content = regsub.gsub('>', '>', content) n = questionprog.match(content) if n > 0: content = '' + content[n:] + '' if ollev: # question reference in index fragid = regsub.gsub('^ +|\.? +$', '', secnum) content = '%s' % (fragid, content) if element[0] == 'h': # heading in the main text fragid = regsub.gsub('^ +|\.? +$', '', secnum) content = secnum + '%s' % (fragid, content) n = answerprog.match(content) if n > 0: # answer paragraph content = regsub.sub(sentprog, '\\1', content[n:]) body.append('<' + element + '>' + content) body.append('') content = '' while clearol and ollev: upol() if clearul and ullev: body.append(''); ullev = 0 # ---------------------------------------------------------------- main program faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin lines = faq.readlines() for line in lines: if line[2:9] == '=======': #

will appear *before* body.append('

') # the underlined heading continue n = orditemprog.match(line) if n > 0: # make ordered list item spew(0, 'clear ul') secnum = line[:n] level = string.count(secnum, '.') while level > ollev: dnol() while level < ollev: upol() element, content = 'li', line[n:] continue n = itemprog.match(line) if n > 0: # make unordered list item spew('clear ol', 0) if ullev == 0: body.append('

\\1

question \\1

section \\1

Python Frequently-Asked Questions v' + version print "

faq2html.py